
global set more off
set more off

global fdr "Directory Name"


***********************************************************************
******
******    CRSP-TAQ link table provided by WRDS
******
***********************************************************************
clear all
import delimited "$fdr\TAQ_CRSP_link.csv"
sort sym_root date permno
egen t=seq(), by(sym_root date)
keep if t==1
drop t
rename sym_root symbol
sort symbol date
save "$fdr\TAQ_CRSP_link.dta", replace

***********************************************************************
******   Construct Mroib measures from raw data ontained from  ********
******   WRDS Intraday Indicators                              ********
***********************************************************************
clear all
import delimited "$fdr\TAQ_measures.csv"
gen year=floor(date/10000)
gen month=floor((date-year*10000)/100)
replace month=month+12*(year-2010)
gen mroibt=(buynumtrades_retail- sellnumtrades_retail)/( buynumtrades_retail+ sellnumtrades_retail)
gen mroibv=(buyvol_retail-sellvol_retail)/(buyvol_retail+sellvol_retail)
save "$fdr\raw_mroib.dta", replace
sort symbol date
merge 1:1 symbol date using "$fdr\TAQ_CRSP_link.dta"
keep if _merge==3
drop _merge
sort permno date
save "$fdr\raw_mroib.dta", replace

************************************************************************
*****   Constructed weekly Mroib measures following BJZZ after  ********
*****   merging daily Mroib data with raw Daily CRSP            ********
************************************************************************
clear all
import delimited "$fdr\Daily_CRSP_base.csv"
sort permno date
merge 1:1 permno date using "$fdr\raw_mroib.dta"
keep if _merge==3
drop _merge
rename mroibv mroibvol
sort permno date
foreach x in mroibvol{
forvalues y = 1(1)4{
by permno: gen l`y'`x'=`x'[_n-`y']
}
}
egen retailvol_flow=rowmean(mroibvol l1mroibvol l2mroibvol l3mroibvol l4mroibvol)
egen missvol_flow=rowmiss(mroibvol l1mroibvol l2mroibvol l3mroibvol l4mroibvol)
gen retailvol_flow2=retailvol_flow*(5-missvol_flow)
by permno: gen retailvol_flow2_lag1=retailvol_flow2[_n-5]
rename date date5
by permno: gen long date=date5[_n+5]
keep if retailvol_flow~=.
keep permno date date5 retailvol_flow retailvol_flow2 retailvol_flow2_lag1
egen t=seq(), by(permno date)
keep if t==1
drop t
sort permno date
save "$fdr\Weekly_retail.dta", replace

************************************************************************
*****   Constructed weekly spreads and depth measures after     ********
*****   merging daily Mroib data with raw Daily CRSP            ********
************************************************************************
clear all
import delimited "$fdr\Daily_CRSP_base.csv"
sort permno date
merge 1:1 permno date using "$fdr\raw_mroib.dta"
keep if _merge==3
drop _merge
keep permno date quotedspread_percent_tw bestofrdepth_share_tw bestbiddepth_share_tw
rename quotedspread_percent_tw qsp_per
rename bestofrdepth_share_tw depth_ask
rename bestbiddepth_share_tw depth_bid
sort permno date
foreach x in qsp_per depth_ask depth_bid{
forvalues y = 1(1)4{
by permno: gen l`y'`x'=`x'[_n-`y']
}
}
egen qspper_week=rowmean(qsp_per l1qsp_per l2qsp_per l3qsp_per l4qsp_per)
egen depthask_week=rowmean(depth_ask l1depth_ask l2depth_ask l3depth_ask l4depth_ask)
egen depthbid_week=rowmean(depth_bid l1depth_bid l2depth_bid l3depth_bid l4depth_bid)
sort permno date
rename date date5
by permno: gen long date=date5[_n+5]
keep if qspper_week~=.
egen t=seq(), by(permno date)
keep if t==1
drop t
sort permno date
save "$fdr\Weekly_Spreads.dta"

************************************************************************
*****  Users first need to construct daily volumes and trade counts ****
*****  for transactions that occur off-exchange and at the midpoint ****
*****  This can be done using the SAS code "Large_TRF_2013.sas";    ****
*****  however, the code must be run for each stock year separately ****
*****  as facilitated by the %macro do merge() function in the code.****
*****  The STATA file "TRF_large.dta", appending the outputs of this****
*****  program across stocks and years, should be merged with Daily ****
*****  CRSP data before weekly observations are constructed         ****
************************************************************************ 

clear all
import delimited "$fdr\Daily_CRSP_base.csv"
destring ret, force replace
replace prc=abs(prc)
sort permno date
merge 1:1 permno date using "$fdr\TRF_large.dta"
drop if _merge==2
drop _merge
sort permno date
foreach x in inst_mp_trd inst_mp_vol offex_trd offex_vol ins_nbbo_trd ins_nbbo_vol{
replace `x'=0 if `x'==.
forvalues y = 1(1)4{
by permno: gen l`y'`x'=`x'[_n-`y']
}
}
foreach x in inst_mp_trd inst_mp_vol offex_trd offex_vol ins_nbbo_trd ins_nbbo_vol{
gen `x'_5=`x'+l1`x'+l2`x'+l3`x'+l4`x'
}
rename date date5
by permno: gen long date=date5[_n+5]
keep if inst_mp_vol_5~=.
keep permno date date5 inst_mp_trd_5 inst_mp_vol_5 offex_trd_5 offex_vol_5 ins_nbbo_trd_5 ins_nbbo_vol_5
egen t=seq(), by(permno date)
keep if t==1
drop t
sort permno date
save "$fdr\Weekly_TRFlarge.dta", replace

***********************************************************************
***** Raw ANcerno data is aggregated across managers by stock-day *****
***** to obtain measures of institutional flow and price impacts  *****
***********************************************************************
*Step 1: aggregat institutional trade data at the stock-day level
clear all
use "$fdr\TAQ_CRSP_link.dta"
sort cusip date
save "$fdr\TAQ_CRSP_link.dta", replace
clear all
use "$fdr\ANcerno_raw.dta"
qui: gen double date2=dofc(tradedate)
qui: format %td date2
qui: gen year=year(date2)
qui: gen month=month(date2)
qui: gen day=day(date2)
qui: gen double date=year*10000+month*100+day
*total institutional sell volume
qui: egen temp=sum(volume) if side==-1, by(stockkey date)
qui: egen insvol_sell=mean(temp), by(stockkey date)
qui: drop temp
*total institutional buy volume
qui: egen temp=sum(volume) if side==1, by(stockkey date)
qui: egen insvol_buy=mean(temp), by(stockkey date)
qui: drop temp
*volume-weighted average execution price of sell trades
qui: gen temp=xp*volume/insvol_sell if side==-1
qui: egen temp2=sum(temp) if side==-1, by(stockkey date)
qui: egen meanxp_sell=mean(temp2), by(stockkey date)
qui: drop temp temp2
*volume-weighted average execution price of buy trades
qui: gen temp=xp*volume/insvol_buy if side==1
qui: egen temp2=sum(temp) if side==1, by(stockkey date)
qui: egen meanxp_buy=mean(temp2), by(stockkey date)
qui: drop temp temp2
replace insvol_buy=0 if insvol_buy==.
replace insvol_sell=0 if insvol_sell==.
qui: egen t=seq() if insvol_buy~=. & insvol_sell~=., by(stockkey date)
keep if t==1
keep symbol cusip dpo dpc dpoc date year month day date2 insvol_sell insvol_buy meanxp_sell meanxp_buy
sort cusip date
merge 1:1 cusip date using "$fdr\TAQ_CRSP_link.dta"
keep if _merge==3
drop _merge
*calculate daily istitutional trade imbalances and price impacts (in bps)
gen mioibvol=( insvol_buy- insvol_sell)/( insvol_buy+ insvol_sell)
replace meanxp_buy=. if meanxp_buy==0
replace meanxp_sell=. if meanxp_sell==0
gen temps= (dpo-meanxp_sell)/dpo*10000
gen tempb= (meanxp_buy-dpo)/dpo*10000
gen tempsb=( insvol_sell* temps+ insvol_buy* tempb)/(insvol_buy + insvol_sell)
gen tc=temps
replace tc=tempb if temps==. & tempb~=.
replace tc=tempsb if tempb~=. & temps~=.
drop temps tempb tempsb
*price impact per $1,000
gen temps= ((dpo-meanxp_sell)/dpo*10000)/(insvol_sell*dpo)*1000
gen tempb= ((meanxp_buy-dpo)/dpo*10000)/(insvol_buy*dpo)*1000
gen tempsb=( insvol_sell* temps+ insvol_buy* tempb)/(insvol_buy + insvol_sell)
gen tc_ps=temps
replace tc_ps=tempb if temps==. & tempb~=.
replace tc_ps=tempsb if tempb~=. & temps~=.
drop temps tempb tempsb
gen insvol_total=insvol_buy+insvol_sell
keep permno date mioibvol tc tc_ps insvol_total insvol_buy insvol_sell
sort permno date
save "$fdr\AN_institutional.dta", replace

*Construct weekly institutional trade data
clear all
import delimited "$fdr\Daily_CRSP_base.csv"
destring ret, force replace
replace prc=abs(prc)
sort permno date
merge 1:1 permno date using "$fdr\AN_institutional.dta"
drop if _merge==2
drop _merge
sort permno date
foreach x in mioibvol tc tc_ps{
forvalues y = 1(1)4{
by permno: gen l`y'`x'=`x'[_n-`y']
}
}
egen instvol_flow=rowmean(mioibvol l1mioibvol l2mioibvol l3mioibvol l4mioibvol)
egen instvol_tc=rowmean(tc l1tc l2tc l3tc l4tc)
egen instvol_tc_ps=rowmean(tc_ps l1tc_ps l2tc_ps l3tc_ps l4tc_ps)
egen missvol_flow=rowmiss(mioibvol l1mioibvol l2mioibvol l3mioibvol l4mioibvol)
gen instvol_flow2=instvol_flow*(5-missvol_flow)
sort permno date
rename date date5
by permno: gen long date=date5[_n+5]
keep if instvol_flow~=.
keep permno date date5 instvol_flow instvol_flow2 instvol_tc instvol_tc_ps
egen t=seq(), by(permno date)
keep if t==1
drop t
sort permno date
save "E:\Retail_V_Institutional\Data\ANcerno_institutional\Weekly_institutional.dta", replace

*************************************************************************
**** Construct weekly (non-overlapping) current and future returns ******
**** following BJZZ. Extend the horizon up to 36 weeks forward     ******
*************************************************************************
clear all
import delimited "$fdr\Daily_CRSP_returns.csv"
destring ret, force replace
replace prc=abs(prc)
replace openprc=abs(openprc)
sort permno date
by permno: gen adj_factor=(1+ret)/(prc/prc[_n-1])
gen mprc=(ask+bid)/2
by permno: gen mret=(mprc/mprc[_n-1])*adj_factor-1
by permno: gen ret_week=ln(1+ret)+ln(1+ret[_n-1])+ln(1+ret[_n-2])+ln(1+ret[_n-3])+ln(1+ret[_n-4])
by permno: gen mret_week=ln(1+mret)+ln(1+mret[_n-1])+ln(1+mret[_n-2])+ln(1+mret[_n-3])+ln(1+mret[_n-4])
gen idret=mprc/openprc-1
gen onret=exp(ln(1+mret)-ln(1+idret))-1
foreach x in idret onret{
by permno: gen `x'_week=ln(1+`x')+ln(1+`x'[_n-1])+ln(1+`x'[_n-2])+ln(1+`x'[_n-3])+ln(1+`x'[_n-4])
}
gen year=floor(date/10000)
gen month=floor((date-year*10000)/100)
replace month=month+12*(year-2010)
foreach x in ret mret onret idret{
by permno: gen `x'_week_lag1=`x'_week[_n-5]
}
foreach z in 1 2 5 8 11 23 35{
foreach x in ret mret idret onret{
by permno: gen `x'_week_lead`z'=`x'_week[_n+(`z'*5)]
}
}
sort permno date
save "$fdr\Weekly_return.dta", replace

*******************************************************************************
****                          Figure 1: Panels A & B                    *******
****                      Table 1 without short interest                *******
*******************************************************************************
clear all
use "$fdr\Weekly_return.dta"
sort permno date
merge 1:1 permno date using "$fdr\Weekly_retail.dta"
keep if _merge==3
drop _merge
sort permno date
merge 1:1 permno date using "E:\Retail_V_Institutional\Data\ANcerno_institutional\Weekly_institutional.dta"
drop if _merge==2
drop _merge
sort permno date
merge 1:1 permno date using "$fdr\Weekly_Spreads.dta"
keep if _merge==3
drop _merge
sort permno date
merge 1:1 permno date using "$fdr\Weekly_TRFlarge.dta"
keep if _merge==3
drop _merge
gen sample0=1 if qspper_week~=. & depthask_week~=. & depthbid_week~=. & mret_week~=. & onret_week~=. & idret_week~=. & retailvol_flow2~=. & mret_week_lag1~=.
foreach x in 1 2 5 8 11 23 35{
replace sample0=. if mret_week_lead`x'==.
replace sample0=. if onret_week_lead`x'==.
replace sample0=. if idret_week_lead`x'==.
}
keep if sample0==1
sort date retailvol_flow2
egen rank=seq() if sample0==1, by(date)
egen count=count(retailvol_flow2) if sample0==1, by(date)
gen retailvol_flow2_pc=rank/count
gen retailvol_flow2_decile=ceil(retailvol_flow2_pc*10)
drop rank count
egen mkt_insflow=mean(instvol_flow2), by(date)
gen instvol_flow2_ma=instvol_flow2-mkt_insflow
*******************************
****** Figure 1, Panel A ******
egen mean_mainstflow=mean(instvol_flow2_ma), by(date retailvol_flow2_decile)
egen t=seq() if mean_mainstflow~=., by(date retailvol_flow2_decile)
egen ave_mean_mainstflow=mean(mean_mainstflow) if t==1, by(retailvol_flow2_decile)
egen z=seq() if ave_mean_mainstflow~=., by(retailvol_flow2_decile)
twoway (connected ave_mean_mainstflow retailvol_flow2_decile if z==1, sort xtitle("Retail trade imbalabce (deciles)") ytitle("Institutional trade imbalance (market-adjusted)"))
drop mean_mainstflow t ave_mean_mainstflow z
*******************************
****** Figure 1, Panel B ******
egen mean_primp=mean(instvol_tc_ps), by(date retailvol_flow2_decile)
egen t=seq() if mean_primp~=., by(date retailvol_flow2_decile)
egen ave_mean_primp=mean(mean_primp) if t==1, by(retailvol_flow2_decile)
egen z=seq() if ave_mean_primp~=., by(retailvol_flow2_decile)
twoway (connected ave_mean_primp retailvol_flow2_decile if z==1, sort xtitle("Retail trade imbalabce (deciles)") ytitle("Institutional price impact (pbs)"))
drop mean_primp t ave_mean_primp z
*******************************
egen mean_inst_mpvol=mean(inst_mp_vol_5), by(permno)
gen inst_mpvol_norm=inst_mp_vol_5/ mean_inst_mpvol
replace inst_mpvol_norm=0 if inst_mpvol_norm==.
gen depth_week=(depthask_week+depthbid_week)/2
winsor2 inst_mpvol_norm qspper_week instvol_flow2 instvol_flow2_ma depth_week instvol_tc_ps mret_week_lag1 onret_week_lag1 idret_week_lag1, by(retailvol_flow2_decile date) c(5 95)
foreach x in instvol_tc_ps mret_week_lag1 onret_week_lag1 idret_week_lag1{
	replace `x'_w=`x'_w*100
}
replace qspper_week_w=qspper_week_w*10000
foreach x in depth_week_w{
	replace `x'=`x'/100
}
label variable inst_mpvol_norm "Off-exchange midpoint"
label variable qspper_week "Relative spread (bps)"
label variable depth_week "Average depth"
label variable instvol_flow2 "Institutional imbalance (raw)"
label variable instvol_flow2_ma "Institutional imbalance (adjusted)"
label variable instvol_tc_ps "Price impact"
label variable mret_week_lag1 "Close-to-close return (%)"
label variable onret_week_lag1 "Intraday return (%)"
label variable idret_week_lag1 "Overnight return (%)"
eststo clear
foreach x in inst_mpvol_norm qspper_week depth_week instvol_flow2 instvol_flow2_ma instvol_tc_ps mret_week_lag1 onret_week_lag1 idret_week_lag1{
	qui: xi: reghdfe `x'_w i.retailvol_flow2_decile, absorb(permno date) cluster(permno date)
	quietly eststo: xlincom margin1 = _b[_cons], repost
***** For actual replication must use i = 2(1)10 - this is not feasible due to sample size of the psudo data
	forvalues i = 3(1)10{
		quietly eststo `x'`i': xlincom margin`i' = _b[_cons]+_b[ _Iretailvol_`i'], repost
	}
} 
esttab inst_mpvol_norm10 qspper_week10 depth_week10 instvol_flow210 instvol_flow2_ma10 instvol_tc_ps10 mret_week_lag110 onret_week_lag110 idret_week_lag110, label keep(margin1 margin2 margin3 margin4 margin5 margin6 margin7 margin8 margin9 margin10) order(margin1 margin2 margin3 margin4 margin5 margin6 margin7 margin8 margin9 margin10) t brackets nostar cells(b(fmt(a2)) ci(par fmt(a2)))

*******************************************************************************
****                   Table 1: code for short interest                 *******
****  Since changes in short interest are only available at biweekly    *******
****  frequencies, Mroib must be aggregated over non-overlapping 2-week *******
****  intervals before its link with short inerest can be estimated.    *******
*******************************************************************************
*merge FINRA short interest data with TAQ/CRSP identifiers
clear all
use "$fdr\TAQ_CRSP_link.dta"
sort gvkey date
save "$fdr\TAQ_CRSP_link.dta", replace
clear all
import delimited "$fdr\Short_interest_raw.csv"
rename datadate date
gen year=floor(date/10000)
sort gvkey date
by gvkey: gen dsi= (shortintadj/ shortintadj[_n-1]-1)*100
sort gvkey date
merge gvkey date using "$fdr\TAQ_CRSP_link.dta"
keep if _merge ==3
drop _merge
egen t=seq(), by(gvkey date)
keep if t==1
drop t
sort permno date
save "$fdr\Biweekly_DSI.dta", replace
*Merge Daily CRSP with biweekly short interest data
clear all
import delimited "$fdr\Daily_CRSP_base.csv"
destring ret, force replace
replace prc=abs(prc)
sort permno date
merge 1:1 permno date using "$fdr\raw_mroib.dta"
drop if _merge==2
drop _merge
rename mroibv mroibvol
gen merge=1 if mroibvol~=.
gen year=floor(date/10000)
egen max_merge=max(merge), by(permno year)
keep if max_merge==1
replace mroibvol=0 if mroibvol==.
sort permno date
merge 1:1 permno date using "$fdr\Biweekly_DSI.dta"
drop if _merge==2
sort permno date
egen siweek=seq() if _merge==3, by(permno)
by permno: replace siweek=siweek[_n-1] if siweek==.
egen biweek_mroibvol=sum(mroibvol), by(permno siweek)
by permno: replace biweek_mroibvol=biweek_mroibvol[_n-1] if _merge==3
keep if _merge ==3
drop _merge
*construct deciled of bi-weekly Mroib
sort date biweek_mroibvol
egen rank=seq() if biweek_mroibvol~=., by(date)
egen count=count(biweek_mroibvol) if biweek_mroibvol~=., by(date)
gen biweek_mroibvol_pc=rank/count
gen biweek_mroibvol_decile=ceil(biweek_mroibvol_pc*10)
drop rank count
winsor2 dsi, by(date biweek_mroibvol_decile) c(5 95)
foreach x in dsi{
eststo clear
	qui: xi: reghdfe `x'_w i.biweek_mroibvol_decile, absorb(permno date) cluster(permno date)
	quietly eststo: xlincom margin1 = _b[_cons], repost
***** For actual replication must use i = 2(1)10 - this is not feasible due to sample size of the psudo data
	forvalues i = 3(1)10{
		quietly eststo `x'`i': xlincom margin`i' = _b[_cons]+_b[_Ibiweek_mr_`i'], repost
	}
esttab `x'10, label keep(margin1 margin2 margin3 margin4 margin5 margin6 margin7 margin8 margin9 margin10) order(margin1 margin2 margin3 margin4 margin5 margin6 margin7 margin8 margin9 margin10) t brackets nostar cells(b(fmt(a2)) ci(par fmt(a2)))
} 

*******************************************************************************
****                   Table 2: Mroib and future returns                *******
*******************************************************************************
clear all
use "$fdr\Weekly_return.dta"
sort permno date
merge 1:1 permno date using "$fdr\Weekly_retail.dta"
keep if _merge==3
drop _merge
sort permno date
merge 1:1 permno date using "E:\Retail_V_Institutional\Data\ANcerno_institutional\Weekly_institutional.dta"
drop if _merge==2
drop _merge
sort permno date
merge 1:1 permno date using "$fdr\Weekly_Spreads.dta"
keep if _merge==3
drop _merge
sort permno date
merge 1:1 permno date using "$fdr\Weekly_TRFlarge.dta"
keep if _merge==3
drop _merge
gen sample0=1 if qspper_week~=. & depthask_week~=. & depthbid_week~=. & mret_week~=. & onret_week~=. & idret_week~=. & retailvol_flow2~=. & mret_week_lag1~=.
foreach x in 1 2 5 8 11 23 35{
replace sample0=. if mret_week_lead`x'==.
replace sample0=. if onret_week_lead`x'==.
replace sample0=. if idret_week_lead`x'==.
}
keep if sample0==1
sort date retailvol_flow2
egen rank=seq() if sample0==1, by(date)
egen count=count(retailvol_flow2) if sample0==1, by(date)
gen retailvol_flow2_pc=rank/count
gen retailvol_flow2_decile=ceil(retailvol_flow2_pc*10)
drop rank count
egen prmean_mret_week=mean(mret_week), by(date retailvol_flow2_decile)
egen temp=mean(mret_week), by(date)
gen ma_mret_week=mret_week-temp
drop temp
foreach x in 1 2 5 8 11 23 35{
egen prmean_mret_week_lead`x'=mean(mret_week_lead`x'), by(date retailvol_flow2_decile)
egen temp=mean(mret_week_lead`x'), by(date)
gen ma_mret_week_lead`x'=mret_week_lead`x'-temp
drop temp
}
egen t=seq() if prmean_mret_week~=., by(date retailvol_flow2_decile)
table retailvol_flow2_decile if t==1, statistic(mean prmean_mret_week prmean_mret_week_lead1 prmean_mret_week_lead2 prmean_mret_week_lead5 prmean_mret_week_lead8 prmean_mret_week_lead11 prmean_mret_week_lead23 prmean_mret_week_lead35)
table retailvol_flow2_decile if t==1, statistic(mean ma_mret_week ma_mret_week_lead1 ma_mret_week_lead2 ma_mret_week_lead5 ma_mret_week_lead8 ma_mret_week_lead11 ma_mret_week_lead23 ma_mret_week_lead35)

***************************************************************
*********       Table 5: Asset Pricing Evidence       *********
*** Generating this table requires data fram various sources **
*** detailed below. Moreover, the psudo data contains stocks **
*** whose previous month'end's closing price exceed $2. To   **
*** replicate results, users should impose the relevant      **
*** share price filters, i.e., $1, $2, or $5.                **
***************************************************************
*1) Liquidity Measures obtained from WRDS Intraday Indicators
***************************************************************
clear all
use "$fdr\raw_mroib.dta"
rename quotedspread_dollar_tw dqsp_tw
rename quotedspread_percent_tw pqsp_tw
rename bestofrdepth_dollar_tw dad_tw
rename bestbiddepth_dollar_tw dbd_tw
rename bestofrdepth_share_tw sad_tw
rename bestbiddepth_share_tw sbd_tw
rename effectivespread_dollar_dw desp_dw
rename effectivespread_dollar_sw desp_sw
rename effectivespread_percent_dw pesp_dw
rename effectivespread_percent_sw pesp_sw
rename dollarrealizedspread_lr_sw drsp_sw
rename dollarrealizedspread_lr_dw drsp_dw
rename percentrealizedspread_lr_sw prsp_sw
rename percentrealizedspread_lr_dw prsp_dw
rename bs_ratio_retail_num ilm_t
rename bs_ratio_retail_vol ilm_v
gen dprimp_dw=desp_dw-drsp_dw
gen pprimp_dw=pesp_dw-prsp_dw
gen dprimp_sw=desp_sw-drsp_sw
gen pprimp_sw=pesp_sw-prsp_sw
foreach x in dqsp_tw pqsp_tw dad_tw dbd_tw sad_tw sbd_tw desp_dw desp_sw pesp_dw pesp_sw drsp_sw drsp_dw prsp_sw prsp_dw dprimp_dw pprimp_dw dprimp_sw pprimp_sw ilm_t ilm_v{
egen mean_`x'=mean(`x'), by(permno month)
egen count_`x'=count(`x') if `x'~=., by(permno month)
}
egen t=seq() if mean_dqsp_tw~=. & mean_pqsp_tw~=. & mean_dad_tw~=. & mean_dbd_tw~=. & mean_sad_tw~=. & mean_sbd_tw~=. & mean_desp_dw~=. & mean_desp_sw~=. & mean_pesp_dw~=. & mean_pesp_sw~=. & mean_drsp_sw~=. & mean_drsp_dw~=. & mean_prsp_sw~=. & mean_prsp_dw~=. & mean_ilm_t~=. & mean_ilm_v~=., by(permno month)
keep if t==1
gen mean_dd_tw=(mean_dad_tw+mean_dbd_tw)/2
gen mean_sd_tw=(mean_sad_tw+mean_sbd_tw)/2
keep permno month year mean_dqsp_tw mean_pqsp_tw mean_desp_dw mean_desp_sw mean_pesp_dw mean_pesp_sw mean_drsp_sw mean_drsp_dw mean_prsp_sw mean_prsp_dw mean_ilm_t mean_ilm_v mean_dd_tw mean_sd_tw mean_dprimp_dw mean_pprimp_dw mean_dprimp_sw mean_pprimp_sw
rename month month_act
gen month=month_act+2
sort permno month
save "$fdr\liquidity_measures.dta", replace
****************************************************************
*2) Insitutional price impacts obtained from ANcerno
****************************************************************
clear all
use "$fdr\AN_institutional.dta"
gen year=floor(date/10000)
gen month=floor((date-year*10000)/100)
replace month=month+12*(year-2010)
gen temp=tc_ps*insvol_total
egen temp_tcps=total(temp), by(permno month)
egen temp_vol=total(insvol_total), by(permno month)
gen mean_pi=temp_tcps/temp_vol
egen t=seq(), by(permno month)
keep if t==1
keep permno month mean_pi
rename month month_act
gen month=month_act+2
sort permno month
save "$fdr\liquidity_ANpi.dta", replace
*********************************************************************
*3) Kyle's Lambda estimated from Trades & Quotes data              **
** Note: Monthly estimates of Kyle's Lambda are constructed using  **
** the SAS code "spreads_lambda.SAS". This program takes the file  **
** "ticker_list.txt" and "year" as inputs and estimates various    **
** liquidity measures. Users should run this code for each year    **
** separtely before appending monthly files into a single file     **
** containing estimates for all stock/years. The file              **
** "Lambda_base.csv" is designed as such                           **
*********************************************************************
clear all
use "$fdr\TAQ_CRSP_link.dta"
gen year=floor(date/10000)
gen month=floor((date-year*10000)/100)
replace month=month+12*(year-2010)
sort symbol date
egen t=seq() if symbol ~="", by(symbol month)
keep if t==1
drop date t
sort symbol month
save "$fdr\Monthly_TAQ_CRSP_link.dta", replace
clear all
import delimited "$fdr\Lambda_base.csv"
keep totalsum ticker month year
rename ticker symbol
rename totalsum lambda
replace lambda=lambda*1000
replace month=month+12*(year-2010)
egen t=seq() if lambda~=., by(symbol month)
keep if t==1
drop t
sort symbol month
merge 1:1 symbol month using "$fdr\Monthly_TAQ_CRSP_link.dta"
keep if _merge==3
drop _merge
sort permno month
rename lambda mean_lambda2
keep symbol month year permno cusip ncusip mean_lambda2
rename month month_act
gen month=month_act+2
egen t=seq() if mean_lambda2~=., by(permno month)
keep if t==1
drop t
sort permno month
save "$fdr\liquidity_lambda.dta", replace
**********************************************************
*4) Roll, AMVIST, and Amihud measures
**********************************************************
clear all
import delimited "$fdr\Daily_CRSP_base.csv"
replace prc=abs(prc)
gen year=floor(date/10000)
gen month=floor((date-year*10000)/100)
replace month=month+12*(year-2010)
*Roll
sort permno date
by permno: gen lret=ret[_n-1] if permno==permno[_n-1]
sort permno month date
egen meanret=mean(ret), by(permno month)
egen meanlret=mean(lret), by(permno month)
egen count=count(ret) if ret~=. & lret~=., by(permno month)
gen dev=(ret-meanret) if ret~=. & lret~=.
gen ldev=(lret-meanlret) if ret~=. & lret~=.
gen prod=dev*ldev
egen sumprod=total(prod) if ret~=. & lret~=., by(permno month)
gen mean_roll=sumprod/(count-1)
*AMVIST
gen amt=(abs(ret)/(vol/(shrout*1000)))/1000
egen mean_amvist=mean(amt), by(permno month)
*Amihud - close-to-clos (CCAM) and open-to-close (OCAM)
gen ccam=(abs(ret)/(prc*vol))*1000
gen idret=prc/openprc-1
gen ocam=(abs(idret)/(prc*vol))*1000
egen mean_ccam=mean(ccam), by(permno month)
egen mean_ocam=mean(ocam), by(permno month)
egen t=seq() if mean_roll~=. & mean_amvist~=. & mean_ccam~=. & mean_ocam~=., by(permno month)
keep if t==1
drop t
rename month month_act
gen month=month_act+2
sort permno month
save "$fdr\liquidity_lowfreq.dta", replace
**********************************************************
*5) Trade-time measures of liquidity obtained from     ***
*   Yashar Barardehi's website                         ***
**********************************************************
clear all
import delimited "$fdr\BBD_raw.csv"
gen month=month_act+12*(year-2010)
replace month=month+2
keep if bbd~=. & wbbd~=.
rename bbd mean_bbd
rename wbbd mean_wbbd
keep permno month month_act year mean_bbd mean_wbbd
sort permno month
save "$fdr\liquidity_bbd.dta", replace
********************************************************************
***                     Asset pricing tests                      ***
*** The file "CRSP_pricing.dta" contains monthly observations on ***
*** stock returns along with all the stock characteristics       ***
*** detailed in Appendix A.                                      ***
********************************************************************
clear all
use "$fdr\CRSP_pricing.dta"
sort permno month
merge 1:1 permno month using "$fdr\liquidity_measures.dta"
keep if _merge==3
drop _merge
sort permno month
merge 1:1 permno month using "$fdr\liquidity_ANpi.dta"
*Note 1: "_merge==2" is used to reflect that the actual ANcerno data might have significant coverage gaps, as is the case in psudo data
*Note 2: To ensure there are enough observation per month despite coverage gaps, this core sets "mean_pi" equal to 0, when it is missing
drop if _merge==2
drop _merge
replace mean_pi=0 if mean_pi==.
sort permno month
merge 1:1 permno month using "$fdr\liquidity_lambda.dta"
keep if _merge==3
drop _merge
sort permno month
merge 1:1 permno month using "$fdr\liquidity_lowfreq.dta", force
keep if _merge==3
drop _merge
sort permno month
merge 1:1 permno month using "$fdr\liquidity_bbd.dta"
keep if _merge==3
drop _merge
gen retrf=(ret-rf)*100
replace ret=ret*100
replace idvol_ret=idvol_ret*100
xtset permno month

*************************** Table 5 - Panel A ***************************
eststo clear
foreach x in pi dqsp_tw sd_tw desp_sw drsp_sw dprimp_sw lambda2 amvist roll ccam ocam bbd wbbd ilm_v{

qui: gen liquidity=mean_`x'

quietly eststo `x': asreg retrf liquidity b_mkt b_hml b_smb bktomkt lnmcap_12 divyd_12 idvol_ret l1ret l212ret, fmb newey(6)

drop liquidity

}
esttab, label keep(_cons liquidity b_mkt b_hml b_smb bktomkt lnmcap_12 divyd_12 idvol_ret l1ret l212ret) order(_cons liquidity b_mkt b_hml b_smb bktomkt lnmcap_12 divyd_12 idvol_ret l1ret l212ret) t brackets star(* 0.10 ** 0.05 *** 0.01) cells(b(star fmt(a2)) t(par fmt(2)))

************************** Table 5 - Panel B ****************************
* FMB - ORTHAGONOLIZED: orthogonalize with respect to each exisintg     *
* measure, and then use the residual to explain expected returns        *
*************************************************************************
foreach x in ilm_v{
eststo clear
qui: gen liquidity=mean_`x'
foreach y in mean_pi mean_dqsp_tw mean_sd_tw mean_desp_sw mean_drsp_sw mean_dprimp_sw mean_lambda2 mean_amvist mean_roll mean_ccam mean_ocam mean_bbd mean_wbbd{		
		quietly: asreg liquidity `y' if liquidity~=. & `y'~=., by(month) fitted
		qui: drop _Nobs _R2 _adjR2 _b_`y' _b_cons _fitted
		rename _residuals temp
		quietly eststo: asreg retrf temp b_mkt b_hml b_smb bktomkt lnmcap_12 divyd_12 idvol_ret l1ret l212ret if liquidity~=. & `y'~=., fmb newey(6)
		drop temp		
}

esttab, label keep(temp) t brackets star(* 0.10 ** 0.05 *** 0.01) cells(b(star fmt(a2)) t(par fmt(2)))
drop liquidity
}


***** END *****















